In [ ]:
import librosa
import librosa.display as lplt
import IPython
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
Connect to kaggle to download the dataset¶
In [ ]:
!pip install -q kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d shreyj1729/best-of-watkins-marine-mammal-sound-database
Dataset URL: https://www.kaggle.com/datasets/shreyj1729/best-of-watkins-marine-mammal-sound-database License(s): unknown Downloading best-of-watkins-marine-mammal-sound-database.zip to /content 100% 6.67G/6.68G [01:24<00:00, 77.9MB/s] 100% 6.68G/6.68G [01:24<00:00, 84.7MB/s]
In [ ]:
!unzip -q best-of-watkins-marine-mammal-sound-database.zip "data/*"
EDA (Exploratory data analysis)¶
In [ ]:
best_of_watkins = {
'AtlanticSpottedDolphin', 'BeardedSeal', 'Beluga_WhiteWhale', 'BottlenoseDolphin',
'BowheadWhale', 'Boutu_AmazonRiverDolphin', 'ClymeneDolphin', 'CommonDolphin', 'FalseKillerWhale', 'Fin_FinbackWhale',
"Fraser'sDolphin", "Grampus_Risso'sDolphin", 'HarpSeal', 'HumpbackWhale', 'KillerWhale', 'LeopardSeal',
'Long_FinnedPilotWhale', 'MelonHeadedWhale', 'MinkeWhale', 'Narwhal', 'NorthernRightWhale',
'PantropicalSpottedDolphin', 'RossSeal', 'Rough_ToothedDolphin', 'Short_Finned(Pacific)PilotWhale',
'SouthernRightWhale', 'SpermWhale', 'SpinnerDolphin', 'StripedDolphin', 'Walrus', 'WeddellSeal',
'White_beakedDolphin', 'White_sidedDolphin'
}
In [ ]:
def fft_wrapper(signal, sampling_rate, return_val=False):
fft_values = np.fft.fft(signal)
nyquist = len(signal) // 2
fft_magnitude = np.abs(fft_values[:nyquist]) / (len(signal) / 2)
freqs = np.fft.fftfreq(len(signal), 1 / sampling_rate)[:nyquist]
if return_val:
return freqs, fft_magnitude, fft_values
return freqs, fft_magnitude
In [ ]:
def teager_kaiser_energy(signal):
return signal**2 - np.roll(signal, 1) * np.roll(signal, -1)
In [ ]:
main_dir = './data'
dir_and_files = []
for subdir in sorted(os.listdir(main_dir)):
if subdir not in best_of_watkins:
print(f"Skipping '{subdir}'.")
continue
subdir_path = os.path.join(main_dir, subdir)
if os.path.isdir(subdir_path):
files = sorted(os.listdir(subdir_path))
if files:
dir_and_files.append((subdir, os.path.join(subdir_path, files[0])))
# dir_and_files.append((subdir, [os.path.join(subdir_path, f) for f in files[:5]]))
Skipping 'BlueWhale'. Skipping 'Commerson'sDolphin'. Skipping 'Dall'sPorpoise'. Skipping 'DuskyDolphin'. Skipping 'FinlessPorpoise'. Skipping 'GraySeal'. Skipping 'GrayWhale'. Skipping 'HarborPorpoise'. Skipping 'HarbourSeal'. Skipping 'Heaviside'sDolphin'. Skipping 'HoodedSeal'. Skipping 'IrawaddyDolphin'. Skipping 'JuanFernandezFurSeal'. Skipping 'LongBeaked(Pacific)CommonDolphin'. Skipping 'NewZealandFurSeal'. Skipping 'RibbonSeal'. Skipping 'RingedSeal'. Skipping 'SeaOtter'. Skipping 'SpottedSeal'. Skipping 'StellerSeaLion'. Skipping 'TucuxiDolphin'. Skipping 'WestIndianManatee'.
In [ ]:
import soundfile as sf
for dir_name, audio_path in dir_and_files:
display(IPython.display.Audio(audio_path))
y, sr = librosa.load(audio_path, sr=None)
freqs, fft_magnitude, fft_vals = fft_wrapper(y, sr, return_val=True)
S = librosa.stft(y=y, n_fft=2048, hop_length=512, window='hann')
S_dB = librosa.amplitude_to_db(np.abs(S), ref=np.max)
S_pow = np.abs(S)**2
tkeo_on_spectrogram = teager_kaiser_energy(S_pow.flatten()).reshape(S_pow.shape)
fig, axes = plt.subplots(4, 1, figsize=(15, 20))
# Waveform
librosa.display.waveshow(y, sr=sr, ax=axes[0])
axes[0].set_title(f'Waveform, {dir_name} sound')
axes[0].set_xlabel('Time (s)')
axes[0].set_ylabel('Amplitude (V)')
# Amplitude Spectrum
axes[1].stem(freqs, fft_magnitude, basefmt=" ")
axes[1].set_title(f'Amplitude Spectrum, {dir_name} sound')
axes[1].set_xlabel('Frequency (Hz)')
axes[1].set_ylabel('Amplitude (V)')
axes[1].grid(True)
# STFT Spectrogram
img = librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='linear', cmap='viridis', ax=axes[2])
fig.colorbar(img, ax=axes[2], format='%+2.0f dB')
axes[2].set_title(f'STFT Spectrogram, {dir_name}')
axes[2].set_xlabel('Time (s)')
axes[2].set_ylabel('Frequency (Hz)')
# TKEO Applied to STFT Spectrogram
img_tkeo = librosa.display.specshow(librosa.amplitude_to_db(tkeo_on_spectrogram, ref=np.max),
sr=sr, x_axis='time', y_axis='linear', cmap='viridis', ax=axes[3])
fig.colorbar(img_tkeo, ax=axes[3], format='%+2.0f dB')
axes[3].set_title('TKEO Applied to STFT Spectrogram')
axes[3].set_xlabel('Time (s)')
axes[3].set_ylabel('Frequency (Hz)')
plt.tight_layout()
plt.show()